package secondweek.one;

import org.junit.Test;

import java.io.UnsupportedEncodingException;
import java.util.Arrays;

/**
 * Created by zhour on 2016/8/11.
 * 得到 String s="中国" 这个字符串的utf-8编码，gbk编码，iso-8859-1编码的字符串，
 * 看看各自有多少字节，同时解释为什么以utf-8编码得到的byte[]无法用gbk的方式“还原”为原来的字符串
 */
public class StringTest {

    @Test
    public void testString(){
        String s = "中国";
        systemDifferentCodeLength(s);
        systemDifferentCodeLength("t");
        systemDifferentCodeLength("1");
    }

    private void systemDifferentCodeLength(String s){
        System.out.println("string : "+s);
        byte[] utf8Byte;
        byte[] gbkByte;
        byte[] iso88591Byte;
        try {
            //UTF-8 是变长的，1-6个字节
            //utf8中数字和英文字母占1个字节，中文三个或者四个字节
            //GBK中数字和英文字母占1个字节，汉字占两个字节
            //iso8859-1 一个字节

            utf8Byte = s.getBytes("UTF-8");
            gbkByte = s.getBytes("GBK");
            iso88591Byte = s.getBytes("iso-8859-1");
            System.out.println("utf8Byte length : "+utf8Byte.length);
            System.out.println("gbkByte length : "+gbkByte.length);
            System.out.println("iso88591Byte length : "+iso88591Byte.length);

            System.out.println("utf8Byte Array : "+ Arrays.toString(utf8Byte));
            System.out.println("gbkByte Array  : "+ Arrays.toString(gbkByte));
            System.out.println("-----------------------------------------------");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
    }
}
